#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2015-07-14 17:24:40
# Project: hj_tianjin_tjhbj

from pyspider.libs.base_handler import *


class Handler(BaseHandler):
    crawl_config = {
     "itag":"v223",
    }

    @every(minutes=24 * 60)
    def on_start(self):
        self.crawl('http://www.tjhb.gov.cn/env/supervised_pollution_info/administrative_penalty/decided_punishment/', callback=self.index_page)
        self.crawl('http://www.tjhb.gov.cn/env/supervised_pollution_info/administrative_penalty/environmentalviolations_timelimit_decision/', callback=self.index_page)
        self.crawl('http://www.tjhb.gov.cn/env/environmental_monitoring/administrative_penalty_information/', callback=self.index_page)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('#futuresubject>li>a').items():
            self.crawl(each.attr.href, callback=self.detail_page)

    @config(priority=2)
    def detail_page(self, response):
        text = 'P.MsoNormal {\n\tFONT-SIZE: 10.5pt; FONT-FAMILY: "Times New Roman","serif"; TEXT-ALIGN: justify; MARGIN: 0cm 0cm 0pt; TEXT-JUSTIFY: inter-ideograph\n}\nLI.MsoNormal {\n\tFONT-SIZE: 10.5pt; FONT-FAMILY: "Times New Roman","serif"; TEXT-ALIGN: justify; MARGIN: 0cm 0cm 0pt; TEXT-JUSTIFY: inter-ideograph\n}\nDIV.MsoNormal {\n\tFONT-SIZE: 10.5pt; FONT-FAMILY: "Times New Roman","serif"; TEXT-ALIGN: justify; MARGIN: 0cm 0cm 0pt; TEXT-JUSTIFY: inter-ideograph\n}\n.TRS_PreAppend .MsoChpDefault {\n\tFONT-SIZE: 10pt\n}\n.TRS_PreAppend DIV.WordSection1 {\n\tpage: WordSection1\n}'
        return {
            "url": response.url,
            "title": response.doc('.pages-title').text(),
            "pubdate" : response.doc('.pages_print').text(),
            "content" : response.doc('.pages_content').text().replace(text,''),
            "site_name": u"天津市环保局",
            "type": u"环境",
            "area": u"天津",
        }
